Skin bleaching interest was extracted from Google Trends as relative search volume (RSV) using the gTrends package. The time period of interest is the beggining of 2008 to the end 2018. Although data is available from 2004 multiple papers suggest that the data is unreliable before 2008.
overall_trend <- gtrends(keyword = "skin bleaching", geo = "US", time = "2008-01-01 2018-12-31", low_search_volume = T, gprop = "web")
overall_trend$interest_over_time %>%
select(date, hits) %>%
# unite("date", date:day,sep = "-") %>%
mutate( yearMonth = yearmonth(date)) %>%
select(yearMonth, hits, -date) %>%
as_tsibble(index = yearMonth) %>%
gg_subseries(hits) +
labs(y = "Relative Search Volume", x = "")
overall_trend$interest_over_time %>%
select(date, hits) %>%
# unite("date", date:day,sep = "-") %>%
mutate( yearMonth = yearmonth(date)) %>%
select(yearMonth, hits, -date) %>%
as_tsibble(index = yearMonth) %>%
autoplot()+
labs(y = "Relative Search Volume", x = "")
#Extracting state wide information
statedf <- c(state.abb, "DC") %>%
tibble(states = .) %>%
mutate(country = rep("US", 51)) %>%
select(country, states) %>%
unite("region" , country:states, sep = "-")
##########################################3
states <- read_csv("./statesTS.csv")
names(states) <- substring(names(states), 8)
#adding regions to data set
states %>%
#tibble() %>%
#unnest() %>%
separate(geo, into = c("country","state")) %>%
select(date,hits,state) %>%
mutate(yearmonth = yearmonth(date)) %>%
select(-date) %>%
as_tsibble(index = yearmonth, key = state) %>%
filter(state %in% c("NY","CA","TX","NJ")) %>%
gg_subseries(hits)
states_q <- states %>%
#tibble() %>%
#unnest() %>%
separate(geo, into = c("country","state")) %>%
select(date,hits,state) %>%
mutate(yearquater = yearquarter(date)) %>%
select(-date) %>%
group_by(yearquater, state) %>%
summarise(hits = sum(hits)) %>%
arrange(state) %>%
as_tsibble(index = yearquater, key = state)
state_region <- tibble(
name = c(state.name,"DistrictofColumbia"),
region = c(as.character(state.region), "South"),
state = c(state.abb, "DC")
)
states_q <- left_join(states_q, state_region, by = "state")
states_q %>%
mutate(state = factor(state)) %>%
filter(state %in% c("NY","CA","TX","NJ")) %>%
gg_subseries(hits)
dcmp <- states %>%
# tibble() %>%
#unnest() %>%
separate(geo, into = c("country","state")) %>%
select(date,hits,state) %>%
mutate(yearmonth = yearmonth(date)) %>%
select(-date) %>%
as_tsibble(index = yearmonth, key = state) %>%
#filter(state %in% c("NY","CA","NJ","TX")) %>%
model(stl = STL(hits ~ season(window = 13) + trend(window = 10), robust = T)) %>%
components()
states %>%
# tibble() %>%
# unnest() %>%
separate(geo, into = c("country","state")) %>%
select(date,hits,state) %>%
mutate(yearmonth = yearmonth(date)) %>%
select(-date) %>%
as_tsibble(index = yearmonth, key = state) %>%
# filter(state %in% c("NY","CA","NJ","TX")) %>%
autoplot(hits) +
theme(legend.position = "none") +
autolayer(dcmp, trend,col = "blue", alpha = 2)
dcmp %>% autoplot()
states_q %>%
autoplot(hits) +
theme(legend.position = "none") +
autolayer(dcmp, trend,col = "blue", alpha = 2)
dcmp2 <- states_q %>%
model(stl = STL(hits ~ season(window = 13) + trend(window = 10), robust = T)) %>%
components()
dcmp2 %>% autoplot() +
theme(legend.position = "none")
#dcmp %>% gg_subseries(season_year)
#dcmp %>% gg_season(hits, label = "right")
Trend and seasonal strength can help us understand which times series experienced the most change and the amount of variability within during the time period.
`%notin%` <- Negate(`%in%`)
test_s <- states %>%
# tibble() %>%
# unnest() %>%
separate(geo, into = c("country","state")) %>%
select(date,hits,state) %>%
mutate(yearmonth = yearmonth(date)) %>%
select(-date) %>%
as_tsibble(index = yearmonth, key = state) #%>%
# filter(state %in% c("NY","CA","NJ","TX","MD","DC","NV","GA", "FL","MI"))
#seasonal and trend strength
q1 <- states_q %>% #data by quarter
mutate(region = as_factor(region)) %>%
features(hits, feat_stl) %>%
ggplot(aes(x = trend_strength, y = seasonal_strength_year)) +
geom_point(#aes(
# col = ifelse(state %in% c("NY","CA","NJ","TX","MD","DC","NV","GA", "FL","MI"), "blue","red"),
# col = region)
) +
geom_label_repel(
aes(label = state ),
data = . %>% filter(state %in% c("NY","CA","NJ","TX","MD","DC","NV","GA", "FL","MI")),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
size = 3
) +
# theme(legend.position = "none")+
scale_fill_viridis_d(aes(col = region))+
labs(title = "States with Highest Regional SB RSV")
q2 <- states_q %>% #data by quarter
mutate(region = as_factor(region)) %>%
features(hits, feat_stl) %>%
ggplot(aes(x = trend_strength, y = seasonal_strength_year)) +
geom_point(#aes(
# col = ifelse(state %in% c("NY","CA","NJ","TX","MD","DC","NV","GA", "FL","MI"), "blue","red"),
# col = region)
) +
geom_label_repel(
aes(label = state ),
data = . %>% filter(trend_strength > 0.5 & seasonal_strength_year > 0.28),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
size = 3
) +
# theme(legend.position = "none")+
scale_fill_viridis_d(aes(col = region))+
labs(title = "Highlights")
##################################################
most_seasonal <- states_q %>%
features(hits, feat_stl) %>%
filter(trend_strength %notin% NaN ) %>%
filter(seasonal_strength_year == max(seasonal_strength_year))
t1<- states_q %>%
right_join(most_seasonal, by =c("state")) %>%
ggplot(aes(x = yearquater, y = hits)) + geom_line()+
ggtitle("Texus")
most_trend <- states_q %>%
features(hits, feat_stl) %>%
filter(trend_strength %notin% NaN ) %>%
filter(trend_strength == max(trend_strength))
c1<- states_q %>%
right_join(most_trend, by = c("state")) %>%
ggplot(aes(x = yearquater, y = hits)) + geom_line() +
ggtitle("California")
q1 + q2
t1 +c1
The plot with the states that have a his SB RSV when utilizing the regional data set from google trends data, now looking closely at their indivifual time series, it suggests states such as NJ, MD, DC, NV may have just always had high SB RSV and are changing as much as the other labeled states who excibit strong trends (increase) and seasonality (predictability) over time. The plot on the right depict a few states with relatively high seasonality and trend which end up all being states that we identified earlier as states with regional RSV above 72 (3rd quartile).
States surrounding FL, GA, MI, NY, CA; though they may not be among those who have high RSV they are interesting still becuase the show similar levels of trend and seasonal strength. This means that they have increased the most…..
ss <- states_q %>%
mutate(region = as_factor(region)) %>%
features(hits, feat_stl) %>%
na.omit()
##########################33 merging data set with shp file
state_m <-
geo_join(s, #STUSPS
ss,
by_sp = "STUSPS",
by_df = "state",
how = "inner" ) %>%
na.omit()
############################### creating legend
legend_tirtile <- function(var1,var2) {
#legend y axis and x axis tirtile intervals
y = round(
c(min(var1,na.rm = T),
quantile(var1,c(0.333,0.667)),
max(var1,na.rm = T)),
2
)
z = c(paste0(y[1]," -", "\n",
"<",y[2]),
paste0(y[2]," -", "\n",
"<", y[3]),
paste0(y[3]," -", "\n",
y[4]))
yy = round(
c(min(var2,na.rm = T),
quantile(var2,c(0.333,0.667)),
max(var2,na.rm = T)),
2
)
zz = c(paste0(yy[1]," -", "\n",
"<",yy[2]),
paste0(yy[2]," -", "\n",
"<", yy[3]),
paste0(yy[3]," -", "\n",
yy[4]))
#legend data frame
legend_scale <- data.frame(
co_var = c(rep(1, 3), rep(2, 3), rep(3, 3)),
trend.strength = c(rep(seq(1, 3, 1), 3)),
color = c("#F1F1F1", "#C3DEEE", "#A1D3EA",
"#F7DBE7", "#CAC8E3", "#A6BDDF",
"#F7C1CB", "#CAAEC8", "#A6A3C4")
)
# legend
legend <- ggplot() +
geom_tile(
data = legend_scale,
aes(x = co_var, y = trend.strength, fill = color)) +
scale_fill_identity() +
scale_x_discrete(limits=zz,
labels=zz)+
scale_y_discrete(limits=z,
labels=z)+
labs(y = "Trend Strength →"
) +
theme(
axis.title = element_text(size = 20),
axis.title.x = element_text( size=30, face = "bold"),
axis.title.y = element_text( size=30, face = "bold"),
axis.line = element_blank(),
# axis.text = element_blank(),
# axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.background = element_blank(),
plot.margin = margin(10, 10, 10, 10),
plot.background = element_rect(fill = "transparent", color = NA),
axis.text.x = element_text(face="bold", color="#000000", size=22),
axis.text.y = element_text(face="bold", color="#000000", size=22)
)
legend
}
#legend plots
#legend = legend_tirtile(state_m$trend_strength, state_m$seasonal_strength_year) +
#labs(x = "Seasonal Strength →")
#ggsave("/Users/stevenlawrence/Desktop/cumc_github/sl4269.github.io/SBL_Global_TS.svg", plot = legend, width = 8, height = 6, bg = "transparent")
pal_bivariate <- colorNumeric(c("#F1F1F1", "#C3DEEE", "#A1D3EA",
"#F7DBE7", "#CAC8E3", "#A6BDDF",
"#F7C1CB", "#CAAEC8", "#A6A3C4"),
1:9)
bivariate_data<- function(x,y){
data <- tibble(yvar = y,
xvar = x
) %>%
mutate(y.val = case_when(
yvar <= yvar %>% quantile(0.333) ~ 3,
yvar > yvar %>% quantile(0.333) & yvar <= yvar %>% quantile(0.667) ~ 2,
yvar > yvar %>% quantile(0.667) ~1
),
x.val= case_when(
xvar <= xvar %>% quantile(0.333) ~ 1,
xvar > xvar %>% quantile(0.333) & xvar <= xvar %>% quantile(0.667) ~ 2,
xvar > xvar %>% quantile(0.667) ~3
),
color = case_when(
y.val == 1 & x.val == 1 ~ 7,
y.val == 1 & x.val == 2 ~ 8,
y.val == 1 & x.val == 3 ~ 9,
y.val == 2 & x.val == 1 ~ 4,
y.val == 2 & x.val == 2 ~ 5,
y.val == 2 & x.val == 3 ~ 6,
y.val == 3 & x.val == 1 ~ 1,
y.val == 3 & x.val == 2 ~ 2,
y.val == 3 & x.val == 3 ~ 3,
)
)
data$color
}
labels <-
paste0(
"State: ",
state_m$NAME, "<br/>",
"Regioan: ",
state_m$REGION, "<br/>",
"Trend Strength: ",
state_m$trend_strength, "<br/>",
"Seasonal Strength: ",
state_m$seasonal_strength_year) %>%
lapply(htmltools::HTML)
state_m %>%
leaflet(
width = "100%",
options = leafletOptions(zoomControl = FALSE)) %>%
addProviderTiles("Stadia.AlidadeSmooth") %>%
addPolygons(fillColor = state_m$trend_strength %>%
bivariate_data(state_m$seasonal_strength_year) %>% pal_bivariate(),
fillOpacity = 1,
stroke = FALSE,
smoothFactor = 0) %>%
addPolygons(
label = lapply(labels, htmltools::HTML),
labelOptions = labelOptions(textsize = "12px"),
fillColor = NA,
fillOpacity = 0,
color = "gray",
weight = 1,
opacity = 1,
highlightOptions = highlightOptions(weight = 2)) %>%
# addResetMapButton() %>%
# addFullscreenControl() %>%
# suspendScroll(sleepNote = F, sleepOpacity = 1) %>%
addControl(
html = "<img src = 'https://sl4269.github.io/SBL_Global_TS.svg' width = '200' height = '200'>",
position = "bottomright",
className = "legend-bivar"
)